wd_other_output<-paste0(wd_code,'06_figures_03_04_table_04_IV/other output/')
data_regression<-data_student_raw %>%
mutate(grad_perc=grad_perc*100,
entrance_perc=entrance_perc*100,
school_mean=school_mean*100,
class_mean=class_mean*100,
grad_perc_0=grad_perc_0*100)
#knit IV main table and all graphs
current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
#figures 3, 4 and table 4
rmarkdown::render("06_figures_03_04_table_04.Rmd",output_dir=wd_other_output)
#robustness checks: appendix
rmarkdown::render("06_regression_and_figure_robustness.Rmd",output_dir=wd_other_output)
#other outcomes: appendix
rmarkdown::render("06_regression_and_other_outcomes.Rmd",output_dir=wd_other_output)
#overidentification: appendix
rmarkdown::render("06_regression_overidentification.Rmd",output_dir= wd_other_output)
setwd(wd_code)
# #Set memory and load packages
# #invisible(utils::memory.limit(64000))
# pacman::p_load(tidyverse,ggplot2,dplyr,fixest,modelsummary,bookdown,forcats,Hmisc,tibble,modelsummary)
#
#
# wd<-('C:/Users/MUNTEANU_A/Dropbox/Research/2018 JMP')
# #wd<-('C:/Users/Andrei/Google Drive/Research/20190300 Romania BAC')
# wd_data<-paste0(wd,'/data/final/')
# setwd(wd_data)
data_regression<-data_student_raw
# openings<-readRDS('openings_anon')
#add instruments for Heckman; the proportion of dropouts in your school
data_regression<-data_regression %>%
dplyr::select(judet_bac,judet_adm,judet_ms,id_bac,media_la_admitere,id_adm,dec_town,entrance_perc,n_hs_town_group,dec_town,n_students_town_yr,n_hs_town,
town,an,grad_perc,class_mean,school_mean,school_mean_yr,class_mean_yr,school_change,scoala_de_provenienta,unitate_de_invatamant,liceu_repartizat,
school_harmonized,specializare_bac2,specializare_adm,Unemployment_hs_bac,Wages_hs_bac,drop_hs_hs_bac,
town_hs_bac,Cod_SIRUTA_hs_bac,drop_middle_ms_adm,drop_hs_ms_adm,town_hs_bac,Cod_SIIIR_hs_bac)
wd<-"C:/Users/MUNTEANU_A/Dropbox/Research/2018 JMP/"
wd_code<-paste0(wd,'code/replication/')
wd_data_intermediate<-paste0(wd,'data/intermediate/')
wd_data_final<-paste0(wd,'data/final/')
setwd(wd_data_final)
data_student_raw<-readRDS("data_student_anon") %>%
select(matches("SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs",perl=T)) %>%
select(-matches("cls|scl|_ID_|opening",perl=T)) %>%
ungroup()
data_teacher_raw<-readRDS('data_teacher_anon') %>%
select(matches("SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs|teacher|County|Year",perl=T)) %>%
select(-matches("_Level|Certification|Hometown|Long|County\\.|County_|ID|Inspection",perl=T)) %>%
ungroup()
data_exp_raw<-readRDS('data_expenditure_anon') %>%
select(matches("town|judet|ValoareEUR|^an|Type|unitate",perl=T)) %>%
ungroup()
data_student_teacher_raw<-readRDS('data_student_teacher_anon') %>%
select(matches("SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs|teacher|County|Year",perl=T)) %>%
select(-matches("cls|scl|_ID_|opening",perl=T)) %>%
ungroup()
data_student_exp_raw<-readRDS('data_student_expenditure_anon') %>%
select(matches("Exp|SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs|Exp",perl=T)) %>%
select(-matches("cls|scl|_ID_|opening",perl=T)) %>%
ungroup()
data_student_teacher_exp_raw<-readRDS("data_student_teacher_expenditure_anon") %>%
select(matches("SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs|teacher|County|Year|Exp|ValoareEUR|Type|subject|mandatory|elective|disciplina|lb_romana|gpa|Exeprience|Category",perl=T)) %>%
select(-matches("cls|scl|_ID_|opening",perl=T)) %>%
ungroup()
openings<-readRDS('openings_anon')
# #Set memory and load packages
# #invisible(utils::memory.limit(64000))
# pacman::p_load(tidyverse,ggplot2,dplyr,fixest,modelsummary,bookdown,forcats,Hmisc,tibble,modelsummary)
#
#
# wd<-('C:/Users/MUNTEANU_A/Dropbox/Research/2018 JMP')
# #wd<-('C:/Users/Andrei/Google Drive/Research/20190300 Romania BAC')
# wd_data<-paste0(wd,'/data/final/')
# setwd(wd_data)
data_regression<-data_student_raw
# openings<-readRDS('openings_anon')
#add instruments for Heckman; the proportion of dropouts in your school
data_regression<-data_regression %>%
dplyr::select(judet_bac,judet_adm,judet_ms,id_bac,media_la_admitere,id_adm,dec_town,entrance_perc,n_hs_town_group,dec_town,n_students_town_yr,n_hs_town,
town,an,grad_perc,class_mean,school_mean,school_mean_yr,class_mean_yr,school_change,scoala_de_provenienta,unitate_de_invatamant,liceu_repartizat,
school_harmonized,specializare_bac2,specializare_adm,Unemployment_hs_bac,Wages_hs_bac,drop_hs_hs_bac,
town_hs_bac,Cod_SIRUTA_hs_bac,drop_middle_ms_adm,drop_hs_ms_adm,town_hs_bac,Cod_SIIIR_hs_bac)
gc()
data_regression<-as.data.frame(data_regression)
data_regression$n_hs_town_group<-data_regression$n_hs_town
data_regression$n_hs_town_group[data_regression$n_hs_town>=4 & data_regression$n_hs_town<=15]<-"4-15"
data_regression$n_hs_town_group[data_regression$n_hs_town>15]<-"16+"
data_regression$n_hs_town_group<-with(data_regression, reorder(n_hs_town_group, n_hs_town))
data_regression<-data_regression %>% group_by(Cod_SIRUTA_hs_bac) %>% mutate(n_school=length(unique(school_harmonized))) %>% ungroup
data_regression<-as.data.frame(data_regression)
data_regression<-data_regression %>% mutate(fe=paste0(town,":",scoala_de_provenienta))
data_regression<-data_regression %>% filter(an<=2019)
data_regression<-data_regression %>% mutate(quart=cut(entrance_perc, breaks = c(-Inf, 0.25,0.5,0.75, Inf),
labels = c('1','2','3','4'), right = FALSE))
data_regression<-data_regression %>% mutate(dec=cut(entrance_perc, breaks = c(-Inf,0.1,0.2, 0.3,0.4,0.5,0.6,0.7,0.8,0.9, Inf),
labels = c('1','2','3','4','5','6','7','8','9','10'), right = FALSE))
perc.rank <- function(x) {
y<-rank(x)/length(x)
#y<-trunc(rank(ifelse(is.na(x),0,x)))/length(x)
#y<-(y-min(y))/(max(y)-min(y))
return(y)}
data_regression$entrance_perc_town<-NA
data_regression[!is.na(data_regression$media_la_admitere),]<-data_regression[!is.na(data_regression$media_la_admitere),] %>%
group_by(an,town_hs_bac) %>%
mutate(entrance_perc_town=perc.rank(media_la_admitere)) %>%
ungroup
data_regression<-data_regression %>% mutate(quart_town=cut(entrance_perc_town, breaks = c(-Inf, 0.25,0.5,0.75, Inf),
labels = c('1','2','3','4'), right = FALSE))
data_regression<-data_regression %>% mutate(dec_town=cut(entrance_perc_town, breaks = c(-Inf,0.1,0.2, 0.3,0.4,0.5,0.6,0.7,0.8,0.9, Inf),
labels = c('1','2','3','4','5','6','7','8','9','10'), right = FALSE))
####
school_timeline<-data_regression %>%
filter(an<=2019) %>%
group_by(judet_bac,town_hs_bac,an,Cod_SIIIR_hs_bac) %>%
summarise(exists=n()>0) %>%
#summarize(exists=sum(!is.na(media_la_admitere))>10) %>%
spread(an,exists)
#replace NA's with 0's
school_timeline_long<- school_timeline %>%
ungroup %>%
mutate_all(~replace(., is.na(.), 0)) %>%
gather(an,exists,c(4:15)) %>%
arrange(town_hs_bac,Cod_SIIIR_hs_bac)
####
school_timeline<- school_timeline_long %>%
group_by(judet_bac,town_hs_bac,Cod_SIIIR_hs_bac) %>%
mutate(exists=ifelse(exists==0 & lag(exists==1,default=first(exists)) & lead(exists==1,last(exists)),1,exists )) %>%
spread(an,exists) %>%
arrange(town_hs_bac,Cod_SIIIR_hs_bac)
#
schools_town_timeline<-school_timeline %>%
gather(an,exists,c(4:15)) %>%
group_by(judet_bac,town_hs_bac,an) %>%
summarise(n_hs_town=sum(exists)) %>%
spread(an,n_hs_town) %>%
arrange(judet_bac,town_hs_bac)
n_hs<-1
towns_1<-schools_town_timeline %>%
filter(`2008`==n_hs & `2009`==n_hs & `2010`==n_hs & `2011`==n_hs & `2012`==n_hs & `2013`==n_hs &
`2014`==n_hs & `2015`==n_hs & `2016`==n_hs & `2017`==n_hs & `2018`==n_hs & `2019`==n_hs ) %>%
filter(town_hs_bac!=0)
n_hs<-2
towns_2<-schools_town_timeline %>%
filter(`2008`==n_hs & `2009`==n_hs & `2010`==n_hs & `2011`==n_hs & `2012`==n_hs & `2013`==n_hs &
`2014`==n_hs & `2015`==n_hs & `2016`==n_hs & `2017`==n_hs & `2018`==n_hs & `2019`==n_hs ) %>%
filter(town_hs_bac!=0)
data_regression<-dplyr::left_join(data_regression,openings,by=c('an'='an','town_hs_bac'='town_hs_bac'),all.x=T)
data_regression<-dplyr::left_join(data_regression,openings,by=c('an'='an','town_hs_bac'='town_hs_bac'))
# #regression
current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
rmarkdown::render("ddd.Rmd",knit_root_dir = getwd())
#Load Data
data<-data_student_raw
data$n_hs_town_group<-data$n_hs_town
data$n_hs_town_group[data$n_hs_town>=4 & data$n_hs_town<=15]<-"4-15"
data$n_hs_town_group[data$n_hs_town>15]<-"16+"
data$n_hs_town_group<-with(data, reorder(n_hs_town_group, n_hs_town))
#
pacman::p_load(openxlsx)
#Load pop data
setwd(wd_data_final)
pop<-read.xlsx("Population.xlsx")
siruta<-read.xlsx("SIRUTA2-3.xlsx") %>% filter(MED==3) %>% select(SIRUTA,SIRSUP)
###
data<-data %>%
base::merge(siruta,by.x=c("Cod_SIRUTA_hs_bac"),by.y=c("SIRUTA"),all.x=T) %>%
mutate(SIRSUP=ifelse(is.na(SIRSUP),Cod_SIRUTA_hs_bac,SIRSUP)) %>%
base::merge(pop,by.x=c("SIRSUP"),by.y=c("SIRUTA"),suffixes=c("","_pop"),all.x=T) %>%
rename(Cod_SIRUTA2_pop=SIRSUP)
data_town<-data %>%
filter(school_change==F & unitate_de_invatamant!='' & !is.na(unitate_de_invatamant)) %>%
group_by(an,judet_bac,town_hs_bac) %>%
summarise(n_hs_town=length(unique(unitate_de_invatamant)),
n_students_town_yr=mean(n_students_town_yr,na.rm=T),
Wages_hs_bac=mean(Wages_hs_bac,na.rm=T),
Unemployment_hs_bac=mean(Unemployment_hs_bac,na.rm=T),
drop_hs_hs_bac=mean(drop_hs_hs_bac,na.rm=T),
pop_1992=mean(`1992`,na.rm=T),
pop_1993=mean(`1993`,na.rm=T),
pop_1994=mean(`1994`,na.rm=T),
pop_1995=mean(`1995`,na.rm=T),
pop_1996=mean(`1996`,na.rm=T),
pop_1997=mean(`1997`,na.rm=T),
pop_1998=mean(`1998`,na.rm=T),
pop_1999=mean(`1999`,na.rm=T),
pop_2000=mean(`2000`,na.rm=T),
pop_2001=mean(`2001`,na.rm=T),
pop_2002=mean(`2002`,na.rm=T),
pop_2003=mean(`2003`,na.rm=T),
pop_2004=mean(`2004`,na.rm=T),
pop_2005=mean(`2005`,na.rm=T),
pop_2006=mean(`2006`,na.rm=T),
pop_2007=mean(`2007`,na.rm=T),
pop_2008=mean(`2008`,na.rm=T),
pop_2009=mean(`2009`,na.rm=T),
pop_2010=mean(`2010`,na.rm=T),
pop_2011=mean(`2011`,na.rm=T),
pop_2012=mean(`2012`,na.rm=T),
pop_2013=mean(`2013`,na.rm=T),
pop_2014=mean(`2014`,na.rm=T),
pop_2015=mean(`2015`,na.rm=T),
pop_2016=mean(`2016`,na.rm=T),
pop_2017=mean(`2017`,na.rm=T),
pop_2018=mean(`2018`,na.rm=T),
pop_2019=mean(`2019`,na.rm=T)) %>%
arrange(judet_bac,town_hs_bac,an) %>%
rowwise() %>%
#mutate(pop_2008_2019=mean(`2008`,`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`,na.rm=T)) %>%
mutate(pop_2008_2019=mean(c(pop_2008,pop_2009,pop_2010,pop_2011,pop_2012,pop_2013,pop_2014,pop_2015,pop_2016,pop_2017,pop_2018,pop_2019),na.rm=T))
data_town2<-data %>%
filter(school_change==F & unitate_de_invatamant!='' & !is.na(unitate_de_invatamant)) %>%
group_by(judet_bac,town_hs_bac) %>%
summarise(n_hs_town=length(unique(unitate_de_invatamant)),
n_students_town_yr=mean(n_students_town_yr,na.rm=T),
Wages_hs_bac=mean(Wages_hs_bac,na.rm=T),
Unemployment_hs_bac=mean(Unemployment_hs_bac,na.rm=T),
drop_hs_hs_bac=mean(drop_hs_hs_bac,na.rm=T),
pop_1992=mean(`1992`,na.rm=T),
pop_1993=mean(`1993`,na.rm=T),
pop_1994=mean(`1994`,na.rm=T),
pop_1995=mean(`1995`,na.rm=T),
pop_1996=mean(`1996`,na.rm=T),
pop_1997=mean(`1997`,na.rm=T),
pop_1998=mean(`1998`,na.rm=T),
pop_1999=mean(`1999`,na.rm=T),
pop_2000=mean(`2000`,na.rm=T),
pop_2001=mean(`2001`,na.rm=T),
pop_2002=mean(`2002`,na.rm=T),
pop_2003=mean(`2003`,na.rm=T),
pop_2004=mean(`2004`,na.rm=T),
pop_2005=mean(`2005`,na.rm=T),
pop_2006=mean(`2006`,na.rm=T),
pop_2007=mean(`2007`,na.rm=T),
pop_2008=mean(`2008`,na.rm=T),
pop_2009=mean(`2009`,na.rm=T),
pop_2010=mean(`2010`,na.rm=T),
pop_2011=mean(`2011`,na.rm=T),
pop_2012=mean(`2012`,na.rm=T),
pop_2013=mean(`2013`,na.rm=T),
pop_2014=mean(`2014`,na.rm=T),
pop_2015=mean(`2015`,na.rm=T),
pop_2016=mean(`2016`,na.rm=T),
pop_2017=mean(`2017`,na.rm=T),
pop_2018=mean(`2018`,na.rm=T),
pop_2019=mean(`2019`,na.rm=T)) %>%
arrange(judet_bac,town_hs_bac) %>%
rowwise() %>%
#mutate(pop_2008_2019=mean(`2008`,`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`,na.rm=T)) %>%
mutate(pop_2008_2019=mean(c(pop_2008,pop_2009,pop_2010,pop_2011,pop_2012,pop_2013,pop_2014,pop_2015,pop_2016,pop_2017,pop_2018,pop_2019),na.rm=T))
###
setwd(wd_replication)
current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
rmarkdown::render("determinants.Rmd",knit_root_dir = getwd())
#load package for endogenous market graph between schools
pacman::p_load(igraph)
#load data
# setwd(wd_data_final)
data_regression<-data_student_raw
#clean data
data_regression<-data_regression %>% group_by(an,scoala_de_provenienta) %>% mutate(ms_drop_perc=sum(is.na(id_bac))/n())
data_regression<-data_regression %>% mutate(drop=ifelse(is.na(id_bac),1,0))
data_regression<-data_regression %>%
dplyr::select(judet_bac,judet_adm,judet_ms,id_bac,media_la_admitere,id_adm,drop,ms_drop_perc,dec_town,entrance_perc,n_hs_town_group,dec_town,n_students_town_yr,n_hs_town,
town,an,grad_perc,class_mean,school_mean,school_change,scoala_de_provenienta,unitate_de_invatamant,liceu_repartizat,
school_harmonized,specializare_bac2,specializare_adm,Unemployment_hs_bac,Wages_hs_bac,drop_hs_hs_bac,
town_hs_bac,Cod_SIRUTA_hs_bac,drop_middle_ms_adm,drop_hs_ms_adm)
data_regression<-as.data.frame(data_regression)
data_regression$n_hs_town_group<-data_regression$n_hs_town
data_regression$n_hs_town_group[data_regression$n_hs_town>=4 & data_regression$n_hs_town<=15]<-"4-15"
data_regression$n_hs_town_group[data_regression$n_hs_town>15]<-"16+"
data_regression$n_hs_town_group<-with(data_regression, reorder(n_hs_town_group, n_hs_town))
data_regression<-data_regression %>% group_by(Cod_SIRUTA_hs_bac) %>% mutate(n_school=length(unique(school_harmonized))) %>% ungroup
data_regression<-as.data.frame(data_regression)
data_regression<-data_regression %>% mutate(fe=paste0(town,":",scoala_de_provenienta))
data_regression<-data_regression %>% filter(an<=2019)
data_regression_orig<-data_regression
#knit IV other specifications for appendix (different controls)
#track
current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
rmarkdown::render("Endogenous_Markets.Rmd",knit_root_dir = getwd())
getwd()
current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
rmarkdown::render("Endogenous_Markets.Rmd",knit_root_dir = getwd())
#subset variables
setwd(wd_data_final)
data_student_anon<-readRDS(data_student_anon,"data_student_anon",compress=FALSE)
#subset variables
setwd(wd_data_final)
data_student_anon<-readRDS(data_student_anon,"data_student_anon")
setwd(wd_data_final)
data_student_anon<-readRDS(data_student_anon,"data_student_anon")
wd_data_final
setwd(wd_data_final)
data_student_anon<-readRDS("data_student_anon")
data_teacher_anon<-readRDS("data_teacher_anon")
data_expenditure_anon<-readRDS("data_expenditure_anon")
data_student_teacher_anon<-readRDS("data_student_teacher_anon")
data_student_expenditure_anon<-readRDS("data_student_expenditure_anon")
data_student_teacher_expenditure<-readRDS("data_student_teacher_expenditure")
data_student_anon<-readRDS("data_student_anon") %>%
select(matches("SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs",perl=T)) %>%
select(-matches("cls|scl|_ID_|opening",perl=T)) %>%
ungroup()
data_teacher_anon<-readRDS('data_teacher_anon') %>%
select(matches("SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs|teacher|County|Year",perl=T)) %>%
select(-matches("_Level|Certification|Hometown|Long|County\\.|County_|ID|Inspection",perl=T)) %>%
ungroup()
data_exp_anon<-readRDS('data_expenditure_anon') %>%
select(matches("town|judet|ValoareEUR|^an|Type|unitate",perl=T)) %>%
ungroup()
data_student_teacher_anon<-readRDS('data_student_teacher_anon') %>%
select(matches("SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs|teacher|County|Year",perl=T)) %>%
select(-matches("cls|scl|_ID_|opening",perl=T)) %>%
ungroup()
data_student_teacher_exp_anon<-readRDS("data_student_teacher_expenditure_anon") %>%
select(matches("SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs|teacher|County|Year|Exp|ValoareEUR|Type|subject|mandatory|elective|disciplina|lb_romana|gpa|Exeprience|Category",perl=T)) %>%
select(-matches("cls|scl|_ID_|opening",perl=T)) %>%
ungroup()
data_student_exp_anon<-readRDS('data_student_expenditure_anon') %>%
select(matches("Exp|SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs|Exp",perl=T)) %>%
select(-matches("cls|scl|_ID_|opening",perl=T)) %>%
ungroup()
data_student_expenditure_anon<-readRDS('data_student_expenditure_anon') %>%
select(matches("Exp|SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs|Exp",perl=T)) %>%
select(-matches("cls|scl|_ID_|opening",perl=T)) %>%
ungroup()
data_student_teacher_expenditure_anon<-readRDS("data_student_teacher_expenditure_anon") %>%
select(matches("SIRUTA|SIIIR|judet|^an|liceu|school_h|scoala_de|town|specializare|id_|unitate|n_|dist|rezultat|school_change|entrance_|grad_|class_|school_|dec|quart|med|Wages_hs|drop|Unemployment_hs|teacher|County|Year|Exp|ValoareEUR|Type|subject|mandatory|elective|disciplina|lb_romana|gpa|Exeprience|Category",perl=T)) %>%
select(-matches("cls|scl|_ID_|opening",perl=T)) %>%
ungroup()
setwd(wd_data_final)
saveRDS(data_student_anon,"data_student_anon",compress=T)
saveRDS(data_teacher_anon,"data_teacher_anon",compress=T)
saveRDS(data_expenditure_anon,"data_expenditure_anon",compress=T)
saveRDS(data_student_teacher_anon,"data_student_teacher_anon",compress=T)
saveRDS(data_student_expenditure_anon,"data_student_expenditure_anon",compress=T)
saveRDS(data_student_teacher_expenditure_anon,"data_student_teacher_expenditure",compress=T)
saveRDS(openings_anon,"openings_anon",compress=T)
openings<-readRDS('openings_anon')
openings_anon<-readRDS('openings_anon')
openings<-readRDS('openings_anon')
saveRDS(openings_anon,"openings_anon",compress=T)
setwd(wd_data_final)
data_student_raw<-readRDS("data_student_anon")
data_teacher_raw<-readRDS('data_teacher_anon')
data_exp_raw<-readRDS('data_expenditure_anon')
data_student_teacher_raw<-readRDS('data_student_teacher_anon')
data_student_exp_raw<-readRDS('data_student_expenditure_anon')
data_student_teacher_exp_raw<-readRDS("data_student_teacher_expenditure_anon")
openings<-readRDS('openings_anon')
setwd(wd_code)
source('functions.R')
pacman::p_load(MatchIt,optmatch)
setwd(wd_data_final)
data<-data_student_raw
data<-data %>%
filter(an_bac %in% 2008:2019) %>%
filter(liceu_repartizat!='' ) %>%
select(grad_perc,entrance_perc,dec_town,dec,n_hs_town_group,n_students_town_yr,n_hs_town, Unemployment_hs_bac,Wages_hs_bac,drop_hs_hs_bac,
an,specializare_bac2,scoala_de_provenienta,media_la_admitere,
class_mean,dec_town,n_hs_town_group,
judet_bac,town,school_change,school_mean,quart_town,
rezultat,judet_adm,Cod_SIIIR_hs_adm,unitate_de_invatamant,liceu_repartizat,Cod_SIRUTA2_hs_adm,
specializare_adm,specializare_lb,an_bac) %>%
mutate(grad_perc_zero=ifelse(is.na(grad_perc),0,grad_perc)) %>%
group_by(town,an) %>%
mutate(grad_perc_town=mean(grad_perc,na.rm=T),
entrance_perc_town=mean(entrance_perc,na.rm=T),
grad_perc_town_zero=mean(grad_perc_zero,na.rm=T)) %>%
ungroup()
data<-as.data.frame(data)
data$n_hs_town_group<-data$n_hs_town
data$n_hs_town_group[data$n_hs_town>=4 & data$n_hs_town<=15]<-"4-15"
data$n_hs_town_group[data$n_hs_town>15]<-"16+"
data$n_hs_town_group<-with(data, reorder(n_hs_town_group, n_hs_town))
groups<-c("1","2","3","4-15","16+")
student_bins<-c("25","35","75","150")
scale<-c("5","3","2","1","1","1","1","1","1","1")
years<-sort(unique(data$an_bac))
#match ----
data_reg<-data.frame()
for (yr in years){
print(eval(sprintf("Year: %.0f",yr)))
data_temp_yr<-data %>%
filter(an==yr)
#for (i in 1) {
for (i in 1:(length(groups)-1)) {
n<-groups[i]
n_1<-groups[i+1]
print(eval(sprintf("High Schools: %s",n)))
for (decile in 1:10 ){
ds<-c("1","2","3","4","5","6","7","8","9","10")
d<-ds[decile]
print(eval(sprintf("Decile: %s",d)))
data_temp<-data_temp_yr %>%
ungroup() %>%
filter(dec_town==d) %>%
filter(n_hs_town_group %in% c(n,n_1)) %>%
mutate(treatment=ifelse(n_hs_town_group %in% (n_1),1,0)) %>%
filter(!is.na(entrance_perc) &
!is.na(n_students_town_yr) &
!is.na(an))# &
x<-matchit(treatment ~ entrance_perc+n_students_town_yr+entrance_perc_town,
data = data_temp,
method = "nearest",
distance = "mahalanobis",
caliper=c(entrance_perc=0.01,n_students_town_yr=35*2^i*1.03^(-decile*i),entrance_perc_town=0.05),
std.caliper=F,
replace = T)
data_temp<-match.data(x)
data_reg<-bind_rows(data_reg,data_temp)
}
}
}
summary(x)
setwd(wd_data_final)
saveRDS(data_reg,"data_matching_nearest_loose_anon")
data_reg<-readRDS("data_matching_nearest_loose_anon")
data_reg<- data_reg %>%
mutate(group=case_when((treatment==0 & n_hs_town_group=="1") |(treatment==1 & n_hs_town_group=="2") ~ 1,
(treatment==0 & n_hs_town_group=="2") |(treatment==1 & n_hs_town_group=="3") ~ 2,
(treatment==0 & n_hs_town_group=="3") |(treatment==1 & n_hs_town_group=="4-15") ~ 3,
(treatment==0 & n_hs_town_group=="4-15") |(treatment==1 & n_hs_town_group=="16+") ~ 4)) %>%
group_by(an,entrance_perc,group) %>%
mutate(id=cur_group_id()) %>%
ungroup() %>%
mutate(grad_perc_zero=ifelse(is.na(grad_perc),0,grad_perc)) %>%
group_by(town,an) %>%
mutate(grad_perc_town=mean(grad_perc,na.rm=T),
entrance_perc_town=mean(entrance_perc,na.rm=T),
grad_perc_town_zero=mean(grad_perc_zero,na.rm=T)) %>%
ungroup()
#full model ----
model_grad<-feols(I(grad_perc*100)~entrance_perc+treatment*dec_town+dec_town+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
as.factor(an)+specializare_bac2,
cluster=~id,
weights=~weights,
data=data_reg)
summary(model_grad)
model_grad_1<-feols(I(grad_perc*100)~entrance_perc+treatment*dec_town+dec_town+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
as.factor(an)+specializare_bac2,
cluster=~id,
weights=~weights,
data=data_reg %>% filter(group==1))
summary(model_grad_1)
model_school_mean_1<-feols(I(school_mean*100)~entrance_perc+treatment*dec_town+dec_town+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
as.factor(an)+specializare_bac2+scoala_de_provenienta+town,
cluster=~id,
weights=~weights,
data=data_reg %>%
filter(group==1 ) )
summary(model_school_mean_1)
model_school_mean<-feols(I(school_mean*100)~entrance_perc+treatment*dec_town+dec_town+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
as.factor(an)+specializare_bac2+scoala_de_provenienta+town,
cluster=~id,
weights=~weights,
data=data_reg)
summary(model_school_mean)
model_grad_1_tide<-feols(I(grad_perc*100)~entrance_perc+treatment+dec_town+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
as.factor(an)+specializare_bac2+scoala_de_provenienta+town,
cluster=~id,
weights=~weights,
data=data_reg %>%
filter(group==1 ) )
summary(model_grad_1_tide)
model_grad_tide<-feols(I(grad_perc*100)~entrance_perc+treatment+dec_town+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
as.factor(an)+specializare_bac2+scoala_de_provenienta+town,
cluster=~id,
weights=~weights,
data=data_reg)
summary(model_grad_tide)
f<-function(x) formatC(x, digits = 2, big.mark = ",", format = "f")
options(modelsummary_format_numeric_latex = "plain")
current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
fileConn<-file("a_matching.txt")
writeLines(print(modelsummary(list("2 vs 1"=model_school_mean_1,
"n vs n-1"=model_school_mean,
"2 vs 1"=model_grad_1,
"n vs n-1"=model_grad,
"2 vs 1"=model_grad_1_tide,
"n vs n-1"=model_grad_tide),
statistic = "std.error",
estimate="{estimate}{stars}",
stars=c('^{*}'=0.1,'^{**}'=0.05,'^{***}'=0.01),
output="latex",
#gof_map=metrics,
gof_omit = 'AIC|R2 Pseudo|R2 Adj|R2 Within|BIC|Log.Lik.|Sigma',
coef_omit='none',
#coef_rename=variables,
fmt=2,
escape=F)
), fileConn)
close(fileConn)
r2(model_school_mean_1)
r2(model_school_mean)
r2(model_grad_1)
r2(model_grad)
r2(model_grad_1_tide)
r2(model_grad_tide)
c(r2(model_school_mean_1,type='ar2'),
r2(model_school_mean,type='ar2'),
r2(model_grad_1,type='ar2'),
r2(model_grad,type='ar2'),
r2(model_grad_1_tide,type='ar2'),
r2(model_grad_tide,type='ar2'))
